
	use if year==2017 using "${clean_data}/panel_physicians_clean.dta", clear

	replace income_acs=. if observed_in_acs==0

	cap log close
	log using "${output}/descriptives/appendix_tax_vs_ACS.txt", replace text 

	* Mean income in tax data vs ACS data
	
	gunique(npi) 
	gunique(npi) if year_survey==2017
	gunique(npi) if year_survey==2017 & physician_acs==1
	gunique(npi) if year_survey==2017 & physician_acs==0
	
	sum ptotinc income_acs
	sum ptotinc income_acs if year_survey==2017
	sum ptotinc income_acs if year_survey==2017 & physician_acs==1
	sum ptotinc income_acs if year_survey==2017 & physician_acs==0
	
	* Conditioning on income being positive
	
	sum ptotinc if year_survey==2017 & physician_acs==1 & ptotinc>0
	sum income_acs if year_survey==2017 & physician_acs==1 & income_acs>0

	* Wage income only
	
	sum totw2comp w2wgs if year_survey==2017 & physician_acs==1 & ptotinc>0
	sum wag if year_survey==2017 & physician_acs==1 & income_acs>0	
	sum totw2comp if year_survey==2017 & physician_acs==1 & totw2comp>0
	sum w2wgs if year_survey==2017 & physician_acs==1 & w2wgs>0
	sum wag if year_survey==2017 & physician_acs==1 & wag>0

	* Self-employment income
	
	gen sem_total=sem+sem_sp
	replace sem_total=0.5*sem_total if spouse_doc==1
	
	sum pftloss sem_total if year_survey==2017 & physician_acs==1
	sum pftloss if year_survey==2017 & physician_acs==1 & pftloss>0
	sum sem_total if year_survey==2017 & physician_acs==1 & sem_total>0

	log close
	
	* Save estimates 

	frames reset
	frame create table1 
	frame create table23
	cap program drop gstats2dta
	
	program gstats2dta
	
		gstats tab `1' `2', s(mean count) pretty matasave(desc)
		mata: st_matrix("table", desc.output)
	
		preserve
		#d ;
			clear; svmat table ; g measure = "" ; ren (table1 table2) (avg_measure N) ; g condition = subinstr("`2'","if ","",.) ;
			foreach i of numl 1(1)`=wordcount("`1'")' { ; replace measure = "`:word `i' of `1''" if _n == `i' ; } ;
		#d cr
		tempfile est
		save `est'
		frame table23: append using `est'
		restore
	end
	
	use if year==2017 using "${clean_data}/panel_physicians_clean.dta", clear
	replace income_acs=. if observed_in_acs==0
	
	* Mean income in tax data vs ACS data
	
	forvalues i = 1/4 {
		
		if `i' == 1 	loc drop_condition ""
		if `i' == 2 	loc drop_condition "if year_survey==2017"
		if `i' == 3  	loc drop_condition "if year_survey==2017 & physician_acs==1"
		if `i' == 4 	loc drop_condition "if year_survey==2017 & physician_acs==0"
		
		gstats tab ptotinc income_acs `drop_condition', s(mean count) pretty matasave(desc`i')
		mata: st_matrix("table1`i'", desc`i'.output)
		
		* Save estimates 
		preserve 
			#d ;
			clear; svmat table1`i' ; g measure = "ptotinc" ; ren (table1`i'1 table1`i'2) (avg_measure N) ;replace measure = "income_acs" if _n == 2 ; g condition = subinstr("`drop_condition'","if ","",.) ;
			#d cr
			tempfile table1`i'
			save `table1`i''
			
			frame table1: append using `table1`i''
		restore
	} 
	
	* Conditioning on income being positive, wage income only, self-employment income
	gen sem_total=sem+sem_sp
	replace sem_total=0.5*sem_total if spouse_doc==1
	
	gstats2dta "totw2comp" "if year_survey==2017 & physician_acs==1 & ptotinc>0"
	gstats2dta "wag" "if year_survey==2017 & physician_acs==1 & income_acs>0"
	foreach var of varl totw2comp wag pftloss sem_total {
		gstats2dta "`var'" "if year_survey==2017 & physician_acs==1 & `var'>0"
	}	
	gstats2dta "pftloss sem_total" "if year_survey==2017 & physician_acs==1"
	
	* Export 
	frame table23 {
		tempfile table23
		save `table23'
	}
	
	frame change table1
	append using `table23'
	g N_UR = N
	drbest_docinc avg_measure 
	drbcount N , replace
	order N_UR, last
	export delimited using "$mypath/intermediate_csv/desc06-tax_acs_comparison.csv", replace dataf delim(tab) 